rm(list = ls())

## 

library(tidyverse)
library(pbapply)
library(rdrobust)

## Get federal election data 

bt <- readRDS('data/data_federal.rds') %>%
  filter(!is.na(treated))

## List of outcomes

outcomes <- c('agg_left_party', 
              'left_party',
              'greens_party')

# Mean and SD of vote variables per year

out_df <- bt %>% 
  filter(between(pop_dec_09, 10000 - 1000, 10000 + 1000)) %>% 
  filter(applies_census == 1) %>% 
  filter(!state_id == '08') %>% 
  mutate(weight  = 1 - (abs(10000 - pop_dec_09) / 1000)) %>% 
  group_by(year, treated) %>% 
  summarise_at(vars(one_of(outcomes)),
               list(~weighted.mean(., weight, na.rm = T))) %>% 
  ungroup() %>% 
  pivot_longer(cols = one_of(outcomes), names_to = 'outcome', values_to = 'mean') %>% 
  mutate(treated = ifelse(treated == 1, 'Treated', 'Control'))

# Some renaming

out_df <- out_df %>% 
  mutate(treated = ifelse(treated == 'Treated', 
                          'Population below 10,000 (treated)',
                          'Population above 10,000 (control)')) %>% 
  mutate(outcome = case_when(outcome == 'left_party'~ 'Left party',
                             outcome == 'greens_party'~ 'Greens',
                             outcome == 'agg_left_party'~ 'Left-wing parties\n(Left party & greens)')) %>% 
  mutate(outcome = factor(outcome, levels = unique(outcome)[c(1,2,3)])) 

# Figure A.2: trends in voting behavior ----

paper_plot <- out_df %>% 
  ggplot(aes(year, mean, treated)) +
  geom_vline(xintercept = 2014, linetype = 'dotted') +
  geom_line(aes(linetype = treated)) +
  geom_point(aes(shape = treated), fill = 'white') +
  theme_bw() +
  facet_wrap(~outcome) +
  ylab('Vote share (%)') +
  xlab('Election') +
  scale_x_continuous(breaks = c(2009, 2013, 2017)) +
  theme(legend.position = 'bottom') +
  scale_linetype_discrete(name = '') +
  scale_shape_manual(name = '', values = c(21, 22)) +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))+
  theme(legend.key.width = unit(1, 'cm'))

paper_plot

## Next, estimate main models conditional on pre-treatment differences

covs <- c('turnout_party_2009_btw', 'soz_vers_beschaeftigte_share',
          'pop_density_km2', 'migration_out_share')

## Prep
## First difference between 2009 and 2013 (i.e. prior to treatment)

diff_df <- pblapply(outcomes, function(o) {
  out <- bt %>%
    filter(year %in% c(2009, 2013)) %>%
    pivot_wider(values_from = o, names_from = 'year', id_cols = 'ags',
                names_prefix = 'o') %>%
    mutate(diff = o2013  - o2009) %>%  dplyr::select(ags, diff) 
  ## Rename
  colnames(out)[2] <- o
  
  ## Return this
  out
}) %>%
  reduce(left_join) %>%
  left_join(bt %>% dplyr::select(ags, pop_dec_09, applies_census,
                                 one_of(covs), state_id) %>%
              distinct(ags, .keep_all = T)) %>%
  mutate(runvar = (pop_dec_09 * -1) + 10000) %>%
  filter(applies_census == 1) %>% 
  filter(!state_id == '08')

## Def weight /  kernel function

kern <- function(x, bw) {
  
  z <- x/bw
  k <- ifelse(abs(z) < 1, 1-abs(z), 0)
  
}

## Source helper function to tidy rdrobust output

source("code/tidy_rd.R")

## Greens + Left

out_agg_pretreat <- rdrobust(y = diff_df %>% 
                               pull(agg_left_party), 
                             x = diff_df$runvar, 
                             c = 0)

out_greens_pretreat <- rdrobust(y = diff_df %>% 
                                  pull(greens_party), 
                                x = diff_df$runvar, 
                                c = 0)

out_left_pretreat <- rdrobust(y = diff_df %>% 
                                pull(left_party), 
                              x = diff_df$runvar, 
                              c = 0)

## List

results_rdd <- list(out_agg_pretreat,
                    out_greens_pretreat,
                    out_left_pretreat) %>% 
  lapply(tidy_rd, se_nr = 3) %>% 
  reduce(rbind)

## Round and drop some vars

results_rdd <- results_rdd %>% 
  mutate_at(vars(estimate, std.error, matches('conf'),
                 p.value, bw_left_h),
            list(~round(., 2))) %>% 
  mutate(bw_left_h =floor(bw_left_h)) %>% 
  dplyr::select(estimate, std.error, matches('conf'),
                p.value, bw_left_h, n) %>% 
  mutate(outcome = c("Left-wing parties", "Greens", "Left party"))

# Table A.4: effect on pre-treatment trends ----

results_rdd

## Get first differenced df for the main effect 
## Ie difference pre- and post-treatment

diff_df_main <- pblapply(outcomes, function(o) {
  out <- bt %>%
    filter(year %in% c(2013, 2017)) %>%
    pivot_wider(values_from = o, names_from = 'year', id_cols = 'ags',
                names_prefix = 'o') %>%
    mutate(diff = o2017  - o2013) %>%  dplyr::select(ags, diff) 
  ## Rename
  colnames(out)[2] <- o
  
  ## Return this
  out
}) %>%
  reduce(left_join) %>%
  left_join(bt %>% dplyr::select(ags, pop_dec_09, applies_census,
                                 one_of(covs), state_id) %>%
              distinct(ags, .keep_all = T)) %>%
  mutate(runvar = (pop_dec_09 * -1) + 10000) %>%
  filter(applies_census == 1) %>% 
  filter(!state_id == '08')

## Merge pre-treatment changes to this df
## This allows use to later control for pre-treatment changes

diff_df_pre <- diff_df %>% 
  dplyr::select(ags, agg_left_party, left_party, greens_party) %>% 
  dplyr::rename_with(.cols = -ags, .fn = ~paste0(., '_pre'))

## Merge

diff_df_main <- diff_df_main %>% 
  left_join(diff_df_pre)

## Run main models
## Second model controls for pre-treatment differences

out_agg1 <- rdrobust(y = diff_df_main %>% 
                       pull(agg_left_party), 
                     x = diff_df_main$runvar, 
                     c = 0,
                     covs = diff_df_main[, c(covs)])

out_agg2 <- rdrobust(y = diff_df_main %>% 
                       pull(agg_left_party), 
                     x = diff_df_main$runvar, 
                     c = 0,
                     covs = diff_df_main[, c(covs, 'agg_left_party_pre')])

## Tidy 

results_rdd <- list(out_agg1, out_agg2) %>% 
  lapply(tidy_rd, se_nr = 3) %>% 
  reduce(rbind) %>% 
  mutate(outcome = 'Both')

## Drop some vars, round

results_rdd <- results_rdd %>% 
  mutate_at(vars(estimate, std.error, matches('conf'),
                 p.value, bw_left_h),
            list(~round(., 2))) %>% 
  mutate(bw_left_h =floor(bw_left_h))

# Table A.5: Main effect controlling for pre-treatment trends ----

results_rdd %>% 
  dplyr::select(estimate, std.error, matches('conf'),
                p.value, bw_left_h, n)

